In [1]:
import pandas as pd
import numpy as np

# Modeling
import statsmodels.api as sm
from pygam import GAM, ExpectileGAM, s, l, f
from sklearn.preprocessing import LabelEncoder

# Visualization
import plotly.express as px
from plotnine import *
import textwrap

import warnings
warnings.filterwarnings('ignore')
In [2]:
data = pd.read_csv(r"C:\Users\arham\Desktop\lab_87_price_optimization_python\data\price_optimization.csv")
In [3]:
data
Out[3]:
price quantity_sold product event
0 12.16 1208 Standard Case | iPhone 15 Pro Max No Promo
1 33.87 1299 Standard Case | iPhone 15 Pro Max Black Friday
2 23.04 933 Standard Case | iPhone 15 Pro Max No Promo
3 20.45 572 Standard Case | iPhone 15 Pro Max New iPhone
4 17.08 1170 Standard Case | iPhone 15 Pro Max Christmas
... ... ... ... ...
795 60.05 127 Premium Case | iPhone 15 Pro New iPhone
796 68.33 175 Premium Case | iPhone 15 Pro No Promo
797 72.69 457 Premium Case | iPhone 15 Pro Black Friday
798 54.28 63 Premium Case | iPhone 15 Pro New iPhone
799 84.60 196 Premium Case | iPhone 15 Pro No Promo

800 rows × 4 columns

In [4]:
data.describe(include='all').T
Out[4]:
count unique top freq mean std min 25% 50% 75% max
price 800.0 NaN NaN NaN 40.911325 20.980488 5.19 24.365 39.825 55.37 89.98
quantity_sold 800.0 NaN NaN NaN 632.9025 371.748974 0.0 283.0 682.5 934.75 1575.0
product 800 4 Standard Case | iPhone 15 Pro Max 200 NaN NaN NaN NaN NaN NaN NaN
event 800 5 No Promo 560 NaN NaN NaN NaN NaN NaN NaN
In [5]:
# 2.0 DATA PREPARATION
# --------------------

data['is_event'] = (data['event'] != "No Promo").astype(int).astype(str)

data['revenue'] = data['price'] * data['quantity_sold']

data
Out[5]:
price quantity_sold product event is_event revenue
0 12.16 1208 Standard Case | iPhone 15 Pro Max No Promo 0 14689.28
1 33.87 1299 Standard Case | iPhone 15 Pro Max Black Friday 1 43997.13
2 23.04 933 Standard Case | iPhone 15 Pro Max No Promo 0 21496.32
3 20.45 572 Standard Case | iPhone 15 Pro Max New iPhone 1 11697.40
4 17.08 1170 Standard Case | iPhone 15 Pro Max Christmas 1 19983.60
... ... ... ... ... ... ...
795 60.05 127 Premium Case | iPhone 15 Pro New iPhone 1 7626.35
796 68.33 175 Premium Case | iPhone 15 Pro No Promo 0 11957.75
797 72.69 457 Premium Case | iPhone 15 Pro Black Friday 1 33219.33
798 54.28 63 Premium Case | iPhone 15 Pro New iPhone 1 3419.64
799 84.60 196 Premium Case | iPhone 15 Pro No Promo 0 16581.60

800 rows × 6 columns

In [6]:
# TRENDS: PRICE vs QUANTITY SOLD
fig = px.scatter(
    data,
    x='price',
    y='quantity_sold',
    color='product',
    opacity=0.6, 
    trendline="lowess",  
    trendline_color_override="blue",
    template="none",
    title='Product Sales: Price vs Quantity Analysis',
    width=800, 
    height=600,
).update_traces(
    marker=dict(size=7),
    hoverlabel=dict(font=dict(size=10)),
).update_layout(
    legend_title_text='Product',
    title_font=dict(size=16),
    legend_font=dict(size=10),
).update_xaxes(
    title_text='Price',
    title_font=dict(size=10), 
    tickfont=dict(size=10),
).update_yaxes(
    title_text='Quantity Sold',
    title_font=dict(size=10), 
    tickfont=dict(size=10),
)

fig
In [7]:
# 4.0 EVENT ANALYSIS (OUTLIERS)
# ----------------------------

# SCATTER PLOT
fig = px.scatter(
    data,
    x='price',
    y='quantity_sold',
    color='event',
    facet_col='product',
    facet_col_wrap=2,
    facet_col_spacing=0.1,
    facet_row_spacing=0.1,
    opacity=0.6, 
    trendline="lowess",  
    trendline_color_override="blue",
    template="none",
    title='Product Sales: Event Analysis',
    width=800, 
    height=700,
).update_traces(
    marker=dict(size=7),
    hoverlabel=dict(font=dict(size=10)),
).update_layout(
    legend_title_text='Product',
    title_font=dict(size=16),
    legend_font=dict(size=10),
).update_xaxes(
    title_text='Price',
    title_font=dict(size=10), 
    tickfont=dict(size=10),
    matches=None,
).update_yaxes(
    title_text='Quantity Sold',
    title_font=dict(size=10), 
    tickfont=dict(size=10),
    matches=None,
)

for annotation in fig['layout']['annotations']: 
    annotation['font'] = dict(size=10) 
    
fig.for_each_xaxis(lambda axis: axis.update(showticklabels=True))
fig.for_each_yaxis(lambda axis: axis.update(showticklabels=True))

fig
In [8]:
# 6.0 GENERAL PRICE OPTIMIZATION: GAMs
# ----------------------------------
# PROBLEM: Prices are non-linear
# SOLUTION: Use GAMs (Generalized Additive Models)
# NOTES:
# - GAMs are like Linear Regression, but allow for non-linear relationships
# - NOT as useful for incorporating events (see next section)

# Keep the entire data as data_filtered
data_filtered = data.query('event == "No Promo"')

# Create a list of unique products
unique_products = data_filtered['product'].unique()

# Create an empty dataframe to store the concatenated results
all_gam_results = pd.DataFrame()

# Loop through each product
for product in unique_products:
    # Filter data for the current product
    product_data = data_filtered[data_filtered['product'] == product]
    
    X = product_data[['price']]
    y = product_data['quantity_sold']
    
    quantiles = [0.025, 0.5, 0.975]
    gam_results = {}

    # Fit the GAM model for the filtered data
    for q in quantiles:
        gam = ExpectileGAM(s(0), expectile=q)
        gam.fit(X, y)
        gam_results[f"pred_{q}"] = gam.predict(X)
    
    # Store the results in a dataframe with index that matches the original data
    predictions_gam = pd.DataFrame(gam_results).set_index(X.index)
    
    # Concatenate the results column-wise with the original data
    predictions_gam_df = pd.concat([product_data[['price', 'product', 'quantity_sold']], predictions_gam], axis=1)
    
    # Concatenate results row-wise
    all_gam_results = pd.concat([all_gam_results, predictions_gam_df], axis=0)

all_gam_results
Out[8]:
price product quantity_sold pred_0.025 pred_0.5 pred_0.975
0 12.16 Standard Case | iPhone 15 Pro Max 1208 936.044699 1056.475514 1154.146510
2 23.04 Standard Case | iPhone 15 Pro Max 933 873.571104 956.955566 1052.298391
5 31.41 Standard Case | iPhone 15 Pro Max 825 764.526030 855.147747 973.488884
6 15.92 Standard Case | iPhone 15 Pro Max 956 891.101051 1007.301043 1130.320181
8 10.12 Standard Case | iPhone 15 Pro Max 1180 966.403722 1076.775625 1158.523702
... ... ... ... ... ... ...
792 73.72 Premium Case | iPhone 15 Pro 140 109.223598 179.842878 249.371987
793 63.27 Premium Case | iPhone 15 Pro 141 142.335381 231.497810 307.703883
794 83.75 Premium Case | iPhone 15 Pro 151 57.256986 155.666478 247.043239
796 68.33 Premium Case | iPhone 15 Pro 175 118.528252 207.001590 273.722289
799 84.60 Premium Case | iPhone 15 Pro 196 52.817411 153.299060 242.991816

560 rows × 6 columns

In [9]:
from plotnine import ggplot, aes, geom_ribbon, geom_point, geom_line, facet_wrap, labs, scale_color_manual, theme_minimal

# Define the plot using ggplot
plot = (
    ggplot(data=all_gam_results, mapping=aes(x='price', y='quantity_sold', color='product', group='product')) +
    geom_ribbon(aes(ymax="pred_0.975", ymin="pred_0.025"), fill="#d3d3d3", color="#000000", alpha=0.75, show_legend=False) +
    geom_point(alpha=0.5) +
    geom_line(aes(y="pred_0.5"), color="blue") +
    facet_wrap('product', scales="free") +
    labs(title="GAM Price vs Quantity Model") +
    scale_color_manual(values=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']) +
    theme_minimal() +  # Using a minimal theme for simplicity
    theme(figure_size=(10, 6))  # Adjust the figure size here (width, height)
)

# Print the plot
plot.draw()
Out[9]:
In [10]:
# Optimize Price for Predicted Daily Revenue

for col in all_gam_results.columns:
    if col.startswith('pred'):
        all_gam_results['revenue_' + col] =all_gam_results['price'] * all_gam_results[col]

all_gam_results['revenue_actual'] = all_gam_results['price'] * all_gam_results['quantity_sold']

all_gam_results
Out[10]:
price product quantity_sold pred_0.025 pred_0.5 pred_0.975 revenue_pred_0.025 revenue_pred_0.5 revenue_pred_0.975 revenue_actual
0 12.16 Standard Case | iPhone 15 Pro Max 1208 936.044699 1056.475514 1154.146510 11382.303543 12846.742255 14034.421558 14689.28
2 23.04 Standard Case | iPhone 15 Pro Max 933 873.571104 956.955566 1052.298391 20127.078235 22048.256240 24244.954919 21496.32
5 31.41 Standard Case | iPhone 15 Pro Max 825 764.526030 855.147747 973.488884 24013.762591 26860.190736 30577.285841 25913.25
6 15.92 Standard Case | iPhone 15 Pro Max 956 891.101051 1007.301043 1130.320181 14186.328733 16036.232608 17994.697287 15219.52
8 10.12 Standard Case | iPhone 15 Pro Max 1180 966.403722 1076.775625 1158.523702 9780.005669 10896.969322 11724.259860 11941.60
... ... ... ... ... ... ... ... ... ... ...
792 73.72 Premium Case | iPhone 15 Pro 140 109.223598 179.842878 249.371987 8051.963618 13258.016951 18383.702853 10320.80
793 63.27 Premium Case | iPhone 15 Pro 141 142.335381 231.497810 307.703883 9005.559573 14646.866464 19468.424667 8921.07
794 83.75 Premium Case | iPhone 15 Pro 151 57.256986 155.666478 247.043239 4795.272564 13037.067509 20689.871250 12646.25
796 68.33 Premium Case | iPhone 15 Pro 175 118.528252 207.001590 273.722289 8099.035453 14144.418672 18703.444008 11957.75
799 84.60 Premium Case | iPhone 15 Pro 196 52.817411 153.299060 242.991816 4468.352979 12969.100509 20557.107611 16581.60

560 rows × 10 columns

In [11]:
best_50 = all_gam_results \
    .groupby('product') \
    .apply(lambda x: x[x['revenue_pred_0.5'] == x['revenue_pred_0.5'].max()].head(1)) \
    .reset_index(level=0, drop=True)
    
best_975 = all_gam_results \
    .groupby('product') \
    .apply(lambda x: x[x['revenue_pred_0.975'] == x['revenue_pred_0.975'].max()].head(1)) \
    .reset_index(level=0, drop=True)
    
best_025 = all_gam_results \
    .groupby('product') \
    .apply(lambda x: x[x['revenue_pred_0.025'] == x['revenue_pred_0.025'].max()].head(1)) \
    .reset_index(level=0, drop=True)
In [12]:
best_50
Out[12]:
price product quantity_sold pred_0.025 pred_0.5 pred_0.975 revenue_pred_0.025 revenue_pred_0.5 revenue_pred_0.975 revenue_actual
681 57.59 Premium Case | iPhone 15 Pro 310 187.930913 281.136183 332.759844 10822.941290 16190.632764 19163.639437 17852.90
249 69.70 Premium Case | iPhone 15 Pro Max 665 599.112739 651.307076 692.913329 41758.157914 45396.103224 48296.058997 46350.50
501 20.96 Standard Case | iPhone 15 Pro 680 485.631098 602.293478 758.898625 10178.827809 12624.071301 15906.515173 14252.80
126 34.98 Standard Case | iPhone 15 Pro Max 811 687.317784 783.531007 842.462477 24042.376077 27407.914622 29469.337448 28368.78
In [13]:
best_975
Out[13]:
price product quantity_sold pred_0.025 pred_0.5 pred_0.975 revenue_pred_0.025 revenue_pred_0.5 revenue_pred_0.975 revenue_actual
642 41.15 Premium Case | iPhone 15 Pro 346 219.673587 369.491219 505.314915 9039.568122 15204.563667 20793.708746 14237.90
217 55.66 Premium Case | iPhone 15 Pro Max 766 716.977611 795.179316 875.313716 39906.973805 44259.680755 48719.961437 42635.56
509 34.58 Standard Case | iPhone 15 Pro 300 208.717309 343.981528 473.870138 7217.444531 11894.881238 16386.429383 10374.00
162 31.90 Standard Case | iPhone 15 Pro Max 875 755.156892 846.144311 959.223021 24089.504858 26992.003506 30599.214360 27912.50
In [14]:
best_025
Out[14]:
price product quantity_sold pred_0.025 pred_0.5 pred_0.975 revenue_pred_0.025 revenue_pred_0.5 revenue_pred_0.975 revenue_actual
713 57.17 Premium Case | iPhone 15 Pro 348 189.435146 283.062658 334.627552 10830.007280 16182.692166 19130.657121 19895.16
249 69.70 Premium Case | iPhone 15 Pro Max 665 599.112739 651.307076 692.913329 41758.157914 45396.103224 48296.058997 46350.50
510 12.10 Standard Case | iPhone 15 Pro 914 915.956078 1018.696826 1121.613888 11083.068541 12326.231598 13571.528040 11059.40
60 32.77 Standard Case | iPhone 15 Pro Max 804 737.025413 828.818903 930.246918 24152.322768 27160.395455 30484.191489 26347.08
In [15]:
from plotnine import ggplot, aes, geom_ribbon, geom_point, geom_line, facet_wrap, labs, scale_color_manual, theme_minimal

# Define the plot using ggplot
plot = (
    ggplot(data=all_gam_results, mapping=aes(x='price', y='revenue_pred_0.5', color='product', group='product')) +
    geom_ribbon(aes(ymax='revenue_pred_0.975', ymin='revenue_pred_0.025'), fill='#d3d3d3', color='#000000', alpha=0.5, show_legend=False) +
    geom_point(aes(y='revenue_actual'), alpha=0.15, color="#2C3E50") +
    geom_line(alpha=0.5) +
    geom_point(data=best_50, color='red') +
    geom_point(data=best_975, mapping=aes(y='revenue_pred_0.975'), color='blue') +
    geom_point(data=best_025, mapping=aes(y='revenue_pred_0.025'), color='blue') +
    facet_wrap('product', scales='free') +
    labs(
        title='iPhone Case Price Optimization',
        subtitle='Maximum Median Revenue (Red) vs 95% Maximum Confidence Interval (Blue)',
        x='Price',
        y='Predicted Revenue'
    ) +
    scale_color_manual(values=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']) +
    theme_minimal() +
    theme(figure_size=(10, 6))  # Adjust the figure size here (width, height)
)

# Print the plot
plot.draw()
Out[15]:
In [16]:
best_50[['product', 'price', 'revenue_pred_0.5', 'revenue_pred_0.025', 'revenue_pred_0.975']]
Out[16]:
product price revenue_pred_0.5 revenue_pred_0.025 revenue_pred_0.975
681 Premium Case | iPhone 15 Pro 57.59 16190.632764 10822.941290 19163.639437
249 Premium Case | iPhone 15 Pro Max 69.70 45396.103224 41758.157914 48296.058997
501 Standard Case | iPhone 15 Pro 20.96 12624.071301 10178.827809 15906.515173
126 Standard Case | iPhone 15 Pro Max 34.98 27407.914622 24042.376077 29469.337448
In [17]:
# MODELING GAMS WITH EVENTS:
# --------------------------
# Essentially the same process as above, but we need to filter out the "No Promo" events
# Gets a little tricky because of limited data points for each event

# Keep the entire data as data_filtered
data_filtered = data.query('event != "No Promo"')
data_filtered
Out[17]:
price quantity_sold product event is_event revenue
1 33.87 1299 Standard Case | iPhone 15 Pro Max Black Friday 1 43997.13
3 20.45 572 Standard Case | iPhone 15 Pro Max New iPhone 1 11697.40
4 17.08 1170 Standard Case | iPhone 15 Pro Max Christmas 1 19983.60
7 13.65 699 Standard Case | iPhone 15 Pro Max New iPhone 1 9541.35
10 19.46 1094 Standard Case | iPhone 15 Pro Max The Big Game 1 21289.24
... ... ... ... ... ... ...
787 40.47 239 Premium Case | iPhone 15 Pro New iPhone 1 9672.33
791 63.29 330 Premium Case | iPhone 15 Pro Black Friday 1 20885.70
795 60.05 127 Premium Case | iPhone 15 Pro New iPhone 1 7626.35
797 72.69 457 Premium Case | iPhone 15 Pro Black Friday 1 33219.33
798 54.28 63 Premium Case | iPhone 15 Pro New iPhone 1 3419.64

240 rows × 6 columns

In [18]:
# Create a list of unique products and events
unique_products = data_filtered['product'].unique()

events_only_gam_results = pd.DataFrame()

gam_results = {}
In [19]:
# Loop through each product
for product in unique_products:
        
    # Filter data for the current product and event
    product_event_data = data_filtered[(data_filtered['product'] == product)]
    
    if len(product_event_data) == 0:
        continue  # skip to next iteration if no data for current product-event combination

    X = product_event_data[['price', 'event']]
    y = product_event_data['quantity_sold']
    
    # NEW: Encode the event column
    le = LabelEncoder()
    X['event'] = le.fit_transform(X['event'])
    
    # NEW: use f(1) to indicate that the event column is categorical
    gam = GAM(l(0) + f(1))
    
    gam.fit(X, y)
    gam_results[f"pred_0.5"] = gam.predict(X)
    
    # Store the results in a dataframe with index that matches the original data
    predictions_gam = pd.DataFrame(gam_results).set_index(X.index)
    
    # Concatenate the results column-wise with the original data
    predictions_gam_df = pd.concat([product_event_data[['price', 'product', 'event', 'quantity_sold']], predictions_gam], axis=1)
    
    # Concatenate results row-wise
    events_only_gam_results = pd.concat([events_only_gam_results, predictions_gam_df], axis=0)

events_only_gam_results
Out[19]:
price product event quantity_sold pred_0.5
1 33.87 Standard Case | iPhone 15 Pro Max Black Friday 1299 1214.774560
3 20.45 Standard Case | iPhone 15 Pro Max New iPhone 572 603.148859
4 17.08 Standard Case | iPhone 15 Pro Max Christmas 1170 1155.482759
7 13.65 Standard Case | iPhone 15 Pro Max New iPhone 699 684.142593
10 19.46 Standard Case | iPhone 15 Pro Max The Big Game 1094 1088.429983
... ... ... ... ... ...
787 40.47 Premium Case | iPhone 15 Pro New iPhone 239 273.187263
791 63.29 Premium Case | iPhone 15 Pro Black Friday 330 343.007394
795 60.05 Premium Case | iPhone 15 Pro New iPhone 127 144.010080
797 72.69 Premium Case | iPhone 15 Pro Black Friday 457 280.991790
798 54.28 Premium Case | iPhone 15 Pro New iPhone 63 182.077105

240 rows × 5 columns

In [20]:
for col in events_only_gam_results.columns:
    if col.startswith('pred'):
        events_only_gam_results['revenue_' + col] =events_only_gam_results['price'] * events_only_gam_results[col]

events_only_gam_results

best_50 = events_only_gam_results \
    .groupby(['product', 'event']) \
    .apply(lambda x: x[x['revenue_pred_0.5'] == x['revenue_pred_0.5'].max()].head(1)) \
    .reset_index(level=0, drop=True)
In [21]:
best_50
Out[21]:
price product event quantity_sold pred_0.5 revenue_pred_0.5
event
Black Friday 779 55.49 Premium Case | iPhone 15 Pro Black Friday 331 394.467150 21888.982171
Christmas 684 54.01 Premium Case | iPhone 15 Pro Christmas 273 399.065156 21553.509072
New iPhone 787 40.47 Premium Case | iPhone 15 Pro New iPhone 239 273.187263 11055.888550
The Big Game 782 47.07 Premium Case | iPhone 15 Pro The Big Game 255 382.429685 18000.965251
Black Friday 398 68.74 Premium Case | iPhone 15 Pro Max Black Friday 898 929.123711 63867.963920
Christmas 307 67.51 Premium Case | iPhone 15 Pro Max Christmas 724 780.239020 52673.936216
New iPhone 323 42.54 Premium Case | iPhone 15 Pro Max New iPhone 448 492.661971 20957.840234
The Big Game 252 65.68 Premium Case | iPhone 15 Pro Max The Big Game 776 752.978053 49455.598490
Black Friday 500 27.75 Standard Case | iPhone 15 Pro Black Friday 706 669.376975 18575.211068
Christmas 466 26.50 Standard Case | iPhone 15 Pro Christmas 400 635.101193 16830.181602
New iPhone 589 20.05 Standard Case | iPhone 15 Pro New iPhone 287 519.722904 10420.444231
The Big Game 482 24.38 Standard Case | iPhone 15 Pro The Big Game 572 561.684630 13693.871284
Black Friday 96 34.46 Standard Case | iPhone 15 Pro Max Black Friday 1347 1207.747162 41618.967204
Christmas 31 17.85 Standard Case | iPhone 15 Pro Max Christmas 1191 1146.311409 20461.658656
New iPhone 172 30.99 Standard Case | iPhone 15 Pro Max New iPhone 279 477.608573 14801.089670
The Big Game 19 32.57 Standard Case | iPhone 15 Pro Max The Big Game 825 932.278830 30364.321485
In [ ]: